“Files Used:”
Z:/COVID-19_WastewaterAnalysis/data/processed/MMSD_Interceptor_Cases_2_7_22.csv
Z:/COVID-19_WastewaterAnalysis/data/processed/LIMSWasteData_02-09-22.csv
MadDF <- filter(FullDF,Site=="Madison")%>%
mutate(FlagedOutliers = IdentifyOutliers(N1,Bin = 21, Action = "Flag"),
#Manual flagging that method misses due to boundary effect with binning
FlagedOutliers = ifelse(Date == mdy("01/26/2021") | Date == mdy("01/27/2021"),
TRUE, FlagedOutliers),
NoOutlierVar = ifelse(FlagedOutliers, NA, N1))
OutlierDF <- MadDF%>%
filter(FlagedOutliers)
FullDFMassRatio <- FullDF%>%
filter(!is.na(N1))%>%
mutate(SC2_mass = (3.785*1e6)*FlowRate*N1)
MissingIntercepter <- FullDFMassRatio%>%
filter(Site!="Madison",!is.na(FlowRate)|!is.na(N1))%>%
group_by(Date)%>%
summarize(N = n())%>%
filter(N!=5)
FullDFMassRatio.Mad <- FullDFMassRatio%>%
filter(!is.na(SC2_mass))%>%
filter(Site=="Madison")
FullDFMassRatio.Inter <- FullDFMassRatio%>%
filter(!is.na(SC2_mass))%>%
filter(Site!="Madison")#TempErrorMetric
TempErrorMetric <- FullDFMassRatio.Inter%>%
group_by(Date)%>%
summarise(InterSum = sum(SC2_mass),InterSumFlow = sum(FlowRate))%>%
inner_join(FullDFMassRatio.Mad)%>%
mutate(MassRatio = SC2_mass/InterSum,
ErrorRatio = 2*(SC2_mass-InterSum)/(SC2_mass+InterSum),
MassRatioFlow = FlowRate/InterSumFlow,
ErrorRatioFlow = 2*(FlowRate-InterSumFlow)/(FlowRate+InterSumFlow))
FullDFMassRatio.Mad <- FullDFMassRatio%>%
filter(!is.na(SC2_mass))%>%
filter(Site=="Madison")%>%
inner_join(FullDFMassRatio.Inter["Date"])%>%
unique()
FullDFMassRatio.Inter <- FullDFMassRatio%>%
filter(Site!="Madison")
KeyOulierPoints <- c(ymd("2021-06-08"),
ymd("2021-10-17"),
ymd("2021-05-02"),
ymd("2021-01-26"))
NonOuliersDF <- MadDF%>%
mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
mutate(N1 = NoOutlierVar)%>%
filter(!(is.na(N1)))
OutLierPlotDF <- MadDF%>%
mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
mutate(N1 = NoOutlierVar)%>%
filter(!(is.na(N1)&is.na(Outlier)))%>%
ggplot(aes(x=Date))+#Data depends on time
geom_line(aes(y=N1,
color="N1",
info = N1),data=NonOuliersDF)+#compares N1
geom_point(aes(y=Outlier,
color="N1 Outlier",
info = Outlier))+
theme_light() +
scale_color_manual(values=c("#F8766D","#999999"))
ggplotly(OutLierPlotDF)
#"2021-06-08","2021-10-17","2021-05-02","2021-01-26"
Sum of interceptor flows agrees with composite flow except for dates with missing flow information for some interceptors. Purple boxes beneath data are to communicate the Dates with interceptor data partially missing.
MissingFlowData <- FullDFMassRatio.Inter%>%
group_by(Date)%>%
summarise(n=n())%>%
filter(n!=5)%>%
mutate(MissingSite=TRUE)
FlowPlot <- FullDFMassRatio.Inter%>%
full_join(MissingFlowData)%>%
mutate(MissingSite=ifelse(is.na(MissingSite),FALSE,MissingSite))%>%
filter(!is.na(FlowRate))%>%
ggplot()+
geom_col(aes(x=Date,y=FlowRate,fill=Site,shape=MissingSite),position="stack", stat="identity", width = 3) +
geom_col(aes(x=Date,y=-.5),data=MissingFlowData,color="Purple",fill="Purple", width = 3)+
scale_fill_brewer(palette="Spectral") +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
theme_light() +
geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=FlowRate),size=2)+
ylab("Average daily flow (MGD)") +
geom_hline(yintercept=median(FullDFMassRatio.Mad$FlowRate), linetype = "dashed", color="black") +
ggtitle("Average daily flow (MMSD)")
ggplotly(FlowPlot)%>%
add_annotations( text="Site, Full Data For Day", xref="paper", yref="paper",
x=1.02, xanchor="left",
y=0.8, yanchor="bottom", # Same y as legend below
legendtitle=TRUE, showarrow=FALSE ) %>%
layout( legend=list(y=0.8, yanchor="top" ) )
MassBalencePlot <- FullDFMassRatio.Inter%>%
full_join(MissingFlowData)%>%
mutate(MissingSite=ifelse(is.na(MissingSite),FALSE,MissingSite))%>%
#filter(!is.na(FlowRate))%>%
#filter(!MissingSite)%>%
ggplot()+
geom_col(aes(x=Date,y=SC2_mass,fill=Site,shape=MissingSite),position="stack", stat="identity", width = 3) +
geom_col(aes(x=Date,y=-3e12),data=MissingFlowData,color="Purple",fill="Purple", width = 3)+
scale_fill_brewer(palette="Spectral") +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
theme_light() +
geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=SC2_mass),size=1)+
ylab("N1 gene copies per day") +
ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")
ggplotly(MassBalencePlot)
InterceptRatioPlot <- FullDFMassRatio.Inter%>%
group_by(Date)%>%
summarise(SC2_InterSum = sum(SC2_mass,na.rm = TRUE),
nIntercepter = n())%>%
inner_join(FullDFMassRatio)%>%
mutate(Ratio = (SC2_mass/SC2_InterSum)*(nIntercepter/5))%>%
arrange(Ratio)%>%
select(Ratio,nIntercepter, everything())%>%
ggplot(aes(x=Date))+
geom_point(aes(y= Ratio,color = Site))
#ggplotly(InterceptRatioPlot)
FullDFMassRatio.Inter%>%
group_by(Date)%>%
summarise(SC2_InterSum = sum(SC2_mass,na.rm = TRUE),
nIntercepter = n())%>%
inner_join(FullDFMassRatio.Mad)%>%
mutate(Ratio = (SC2_mass/SC2_InterSum)*(nIntercepter/5))%>%
arrange(Ratio)%>%
select(Ratio,nIntercepter, everything())#%>%
## # A tibble: 104 x 16
## Ratio nIntercepter Date SC2_InterSum Site FirstConfirmed
## <dbl> <int> <date> <dbl> <chr> <int>
## 1 0.184 3 2021-06-17 7.19e12 Madison NA
## 2 0.195 4 2021-06-28 7.44e12 Madison NA
## 3 0.279 4 2021-05-27 2.82e13 Madison NA
## 4 0.289 4 2021-07-01 4.00e12 Madison 6
## 5 0.301 5 2021-07-19 4.63e12 Madison 17
## 6 0.313 5 2021-05-20 9.76e13 Madison 18
## 7 0.402 5 2021-07-22 2.62e13 Madison 33
## 8 0.406 5 2021-04-22 7.19e12 Madison 53
## 9 0.409 5 2021-03-11 1.55e13 Madison 50
## 10 0.473 5 2021-04-05 2.54e13 Madison 29
## # ... with 94 more rows, and 10 more variables: SevenDayMACases <dbl>,
## # N1 <dbl>, BCoV <dbl>, N2 <dbl>, PMMoV <dbl>, GeoMeanN12 <dbl>,
## # FlowRate <dbl>, temperature <dbl>, equiv_sewage_amt <dbl>, SC2_mass <dbl>
#summarise(mean(Ratio))
SizeUsed=.5
alphaUsed=.9
IntercepterDF <- FullDF %>%
filter(Site != "Madison") #We are looking for agreement between the interceptors
#after some normalization so Madison just distracts
IntercepterOverLay <- IntercepterDF%>%
ggplot(aes(x=Date))+
geom_point(aes(y=N1,color = Site),size = SizeUsed, alpha= alphaUsed,shape=15)+
geom_point(aes(y=N2/10000,color = Site),size = SizeUsed, alpha= alphaUsed,shape=19)+
scale_y_log10()
ggplotly(IntercepterOverLay)
IntercepterOverLayFlow <- IntercepterDF%>%
ggplot(aes(x=Date))+
geom_point(aes(y=N1*FlowRate,color = Site),size = SizeUsed, alpha= alphaUsed,shape=15)+
geom_point(aes(y=N2*FlowRate/10000,color = Site),size = SizeUsed, alpha= alphaUsed,shape=19)+
scale_y_log10()
ggplotly(IntercepterOverLayFlow)
IntercepterOverLayPop <- IntercepterDF%>%
mutate(Pop = case_when(
Site=="MMSD-P2" ~ 111967,
Site=="MMSD-P7" ~ 81977,
Site=="MMSD-P8" ~ 127634,
Site=="MMSD-P11" ~ 130799,
Site=="MMSD-P18" ~ 151470,
Site=="Madison" ~ 603847,
))%>%
ggplot(aes(x=Date))+
geom_point(aes(y=N1/Pop,color = Site),size = SizeUsed, alpha= alphaUsed,shape=15)+
geom_point(aes(y=N2/(FlowRate*1e8),color = Site),size = SizeUsed, alpha= alphaUsed,shape=19)+
scale_y_log10()
ggplotly(IntercepterOverLayPop)
InterceptRatioDF <- FullDF%>%
filter(Site!="Madison")%>%
group_by(Date)%>%
filter(!is.na(N1))%>%
summarise(SC2_InterSum_N1 = sum(N1*FlowRate,na.rm = TRUE),
SC2_InterSum_N2 = sum(N2*FlowRate,na.rm = TRUE),
nIntercepter = n())%>%
full_join(FullDF)%>%
filter(!is.na(N1))%>%
mutate(RatioN1 = (N1*FlowRate/SC2_InterSum_N1)*(nIntercepter/5),
RatioN2 = (N2*FlowRate/SC2_InterSum_N2)*(nIntercepter/5))%>%
select(RatioN1,RatioN2,nIntercepter, everything())
InterceptRatioPlot <- InterceptRatioDF%>%
ggplot(aes(x=Date))+
geom_point(aes(y=RatioN1,color = Site),size = SizeUsed, alpha= alphaUsed,shape=15)+
geom_point(aes(y=RatioN2-5,color = Site),size = SizeUsed, alpha= alphaUsed,shape=19)
ggplotly(InterceptRatioPlot)
InterceptRatioPlotFlow <- InterceptRatioDF%>%
ggplot(aes(x=Date))+
geom_point(aes(y=RatioN1/FlowRate,color = Site),size = SizeUsed, alpha= alphaUsed,shape=15)+
geom_point(aes(y=RatioN2/FlowRate-.3,color = Site),size = SizeUsed, alpha= alphaUsed,shape=19)
ggplotly(InterceptRatioPlot)
InterceptRatioPlotPop <- InterceptRatioDF%>%
mutate(Pop = case_when(
Site=="MMSD-P2" ~ 111967,
Site=="MMSD-P7" ~ 81977,
Site=="MMSD-P8" ~ 127634,
Site=="MMSD-P11" ~ 130799,
Site=="MMSD-P18" ~ 151470,
Site=="Madison" ~ 603847,
))%>%
ggplot(aes(x=Date))+
geom_point(aes(y=RatioN1/Pop,color = Site),size = SizeUsed, alpha= alphaUsed,shape=15)+
geom_point(aes(y=RatioN2/Pop-.3,color = Site),size = SizeUsed, alpha= alphaUsed,shape=19)
ggplotly(InterceptRatioPlot)
IntercepterOverLay <- FullDFMassRatio.Inter%>%
complete(Date,Site)%>%
full_join(FullDFMassRatio)%>%
ggplot(aes(x=Date))+
geom_point(aes(y=N1,color = Site),alpha=.8)+
scale_y_log10()
ggplotly(IntercepterOverLay)
N1BalencePlot <- FullDFMassRatio.Inter%>%
complete(Date,Site)%>%
ggplot()+
geom_col(aes(x=Date,y=N1,fill=Site),
position = "dodge", stat="identity", width = 2) +
geom_col(aes(x=Date,y=-3e3),
data=MissingFlowData,color="Purple",fill="Purple", width = 2)+
scale_fill_brewer(palette="Spectral") +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
theme_light() +
geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=N1),size=1)+
scale_y_log10()+
ylab("N1 gene copies per day") +
ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")
ggplotly(N1BalencePlot)
FullDFMassRatio.Inter%>%
group_by(Site)%>%
summarise(N1 = mean(N1,na.rm=TRUE),
Flow = mean(FlowRate,na.rm=TRUE),
SC2 = mean(SC2_mass,na.rm=TRUE),
AntiSC2 = mean(N1/FlowRate,na.rm=TRUE))#%>%
## # A tibble: 5 x 5
## Site N1 Flow SC2 AntiSC2
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 MMSD-P11 294443. 8.58 9.04e12 34459.
## 2 MMSD-P18 403384. 11.2 1.69e13 36331.
## 3 MMSD-P2 313275. 5.85 6.57e12 53950.
## 4 MMSD-P7 356002. 4.40 5.86e12 87100.
## 5 MMSD-P8 383066. 6.27 8.66e12 61481.
#ungroup()%>%
#summarise(VarN1 = var(N1)/mean(N1)^2,
# VarFlow = var(Flow)/mean(Flow)^2,
# VarSC2 = var(SC2)/mean(SC2)^2,
# VarASC2 = var(AntiSC2)/mean(AntiSC2)^2)
#Graph paportion of data intercepters are
arranged plots with lines to communicate where flagged outliers are. Red Lines show Where there was a flagged on a day that also has collected interceptor data.
StartRange <- mdy("1-20-2021")
EndRange <- mdy("1-10-2022")
AllOuliersDates <- OutlierDF[["Date"]]
OutlierDatesIntercepters <- unique(inner_join(FullDFMassRatio.Inter,OutlierDF["Date"],by=c("Date"))$Date)
a <- OutLierPlotDF+
scale_x_date(limits = c(StartRange,EndRange))+
#geom_vline(xintercept = AllOuliersDates)+
geom_vline(xintercept = OutlierDatesIntercepters,color="Red")+
#geom_vline(xintercept = MissingIntercepter$Date,color="Red")+
theme(title = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position="none")
b <- MassBalencePlot+
scale_x_date(limits = c(StartRange,EndRange))+
#geom_vline(xintercept = (AllOuliersDates))+
geom_vline(xintercept = (OutlierDatesIntercepters),color="Red")+
#geom_vline(xintercept = MissingIntercepter$Date,color="Red")+
theme(title = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position="none")
c <- FlowPlot+
scale_x_date(limits = c(StartRange,EndRange))+
#geom_vline(xintercept = AllOuliersDates)+
geom_vline(xintercept = OutlierDatesIntercepters,color="Red")+
theme(title = element_blank(),
legend.position="none")
ggarrange(a,b,c,nrow=3,align ="v",legend.grob=get_legend(MassBalencePlot),legend="right")
#Cause of missing data
OutlierDatesIntercepters
## [1] "2021-04-15" "2021-04-26" "2021-07-26" "2021-12-13"
N_One_Outliers <- c(ymd("2021-04-15"), ymd("2021-04-26"), ymd("2021-07-26"), ymd("2021-12-13"))
N_TWO_Outliers <- c(ymd("2021-04-15"), ymd("2021-05-20"))
LowRatioN_One <- c("2021-04-15","2021-07-26","2021-12-13","2021-05-31","2021-03-22")
LowRatioN_TWO <- c("2021-04-15","2021-07-26","2021-12-13","2021-10-04","2021-03-22")
AllIntercepterComp_POI <- c(N_One_Outliers,N_TWO_Outliers,LowRatioN_One,LowRatioN_TWO)
FullDF%>%
filter(Date %in% AllIntercepterComp_POI)%>%
select(-FirstConfirmed,SevenDayMACases)
## Date Site SevenDayMACases N1 BCoV N2 PMMoV
## 1 2021-03-22 MMSD-P11 3.600000 74718 7.233 34380 13995222
## 2 2021-03-22 MMSD-P18 3.600000 87689 9.071 92614 5087805
## 3 2021-03-22 MMSD-P2 6.000000 41663 6.174 21141 12906803
## 4 2021-03-22 MMSD-P7 7.400000 43453 0.741 55333 13088946
## 5 2021-03-22 MMSD-P8 8.600000 51290 5.706 44260 19118442
## 6 2021-04-15 MMSD-P11 11.285714 157424 3.626 126684 45532010
## 7 2021-04-15 MMSD-P18 10.142857 NA NA NA NA
## 8 2021-04-15 MMSD-P2 10.285714 48521 3.533 48080 26162914
## 9 2021-04-15 MMSD-P7 11.428571 NA NA NA NA
## 10 2021-04-15 MMSD-P8 11.857143 60518 5.001 54023 20604026
## 11 2021-04-26 MMSD-P11 4.857143 173731 8.414 121556 32400873
## 12 2021-04-26 MMSD-P18 5.333333 1761146 19.407 2270206 18185986
## 13 2021-04-26 MMSD-P2 5.833333 66599 17.244 84380 169992062
## 14 2021-04-26 MMSD-P7 7.666667 145908 12.930 109214 13107287
## 15 2021-04-26 MMSD-P8 8.166667 181856 11.324 194280 20215977
## 16 2021-05-20 MMSD-P11 2.714286 9997 2.871 29627 66800100
## 17 2021-05-20 MMSD-P18 2.857143 1945496 6.505 2297335 15415932
## 18 2021-05-20 MMSD-P2 3.571429 608010 9.994 676281 44245555
## 19 2021-05-20 MMSD-P7 3.571429 56388 2.936 58965 32473853
## 20 2021-05-20 MMSD-P8 3.000000 15247 5.676 17264 18441997
## 21 2021-05-31 MMSD-P11 1.428571 3972 3.353 NA 31740972
## 22 2021-05-31 MMSD-P18 1.333333 120744 4.302 168461 31643578
## 23 2021-07-26 MMSD-P11 4.428571 56082 1.730 57844 34062331
## 24 2021-07-26 MMSD-P18 3.714286 96259 4.820 65563 11582896
## 25 2021-07-26 MMSD-P2 5.428571 87920 7.879 86185 42669683
## 26 2021-07-26 MMSD-P7 6.285714 22234 8.842 18735 21276643
## 27 2021-07-26 MMSD-P8 7.571429 14820 9.955 17825 24223424
## 28 2021-10-04 MMSD-P11 21.857143 132758 3.220 122553 30769148
## 29 2021-10-04 MMSD-P18 25.142857 100502 0.800 81696 9759807
## 30 2021-10-04 MMSD-P2 26.285714 220932 3.560 170892 13670096
## 31 2021-10-04 MMSD-P7 29.285714 137332 3.210 101734 14498503
## 32 2021-10-04 MMSD-P8 27.857143 172914 1.680 128467 20682700
## 33 2021-12-13 MMSD-P11 19.000000 458847 16.130 326182 86006923
## 34 2021-12-13 MMSD-P18 21.000000 722770 12.720 887123 13380021
## 35 2021-12-13 MMSD-P2 34.000000 429555 2.325 475306 33305288
## 36 2021-12-13 MMSD-P7 49.285714 300888 2.231 499546 31784189
## 37 2021-12-13 MMSD-P8 56.285714 443364 8.883 456116 18625898
## 38 2021-03-22 Madison 35.000000 162650 1.035 121319 16649039
## 39 2021-04-15 Madison 50.285714 571269 3.684 706418 23628196
## 40 2021-04-26 Madison 39.333333 675748 16.079 546554 28619982
## 41 2021-05-20 Madison 19.666667 224021 14.337 285371 49311472
## 42 2021-05-31 Madison 6.000000 147362 3.645 171524 37833422
## 43 2021-07-26 Madison 43.333333 186460 3.589 143034 30230924
## 44 2021-10-04 Madison 90.000000 244225 2.350 283059 12107928
## 45 2021-12-13 Madison 229.333333 1498471 13.886 1515771 24525524
## 46 2021-05-31 MMSD-P2 NA 13114 4.599 15239 29170047
## 47 2021-05-31 MMSD-P7 NA 8117 1.276 NA 30386513
## 48 2021-05-31 MMSD-P8 NA 58037 5.907 46865 19836360
## GeoMeanN12 FlowRate temperature equiv_sewage_amt
## 1 74718 9.02 NA 1.000
## 2 87689 11.52 NA 1.000
## 3 41663 5.81 NA 1.000
## 4 43453 3.67 NA 1.000
## 5 51290 6.18 NA 1.000
## 6 157424 8.91 NA 1.000
## 7 NA NA NA NA
## 8 48521 6.33 NA 1.000
## 9 NA NA NA NA
## 10 60518 6.78 NA 1.000
## 11 173731 8.62 NA 0.250
## 12 1761146 10.79 NA 0.250
## 13 66599 5.84 NA 0.250
## 14 145908 4.41 NA 0.250
## 15 181856 6.38 NA 0.250
## 16 9997 8.61 NA 0.625
## 17 1945496 11.32 NA 0.625
## 18 608010 5.52 NA 0.625
## 19 56388 3.65 NA 0.625
## 20 15247 6.88 NA 0.625
## 21 3972 7.88 NA 0.625
## 22 120744 9.98 NA 0.625
## 23 56082 8.44 NA 1.000
## 24 96259 10.28 NA 1.000
## 25 87920 5.74 NA 1.000
## 26 22234 4.52 NA 1.000
## 27 14820 5.95 NA 1.000
## 28 132758 8.47 NA 0.625
## 29 100502 10.08 NA 0.625
## 30 220932 6.74 NA 0.625
## 31 137332 4.35 NA 0.625
## 32 172914 6.27 NA 0.625
## 33 458847 8.16 NA 0.625
## 34 722770 10.74 NA 0.625
## 35 429555 6.14 NA 0.625
## 36 300888 4.65 NA 0.625
## 37 443364 6.09 NA 0.625
## 38 162650 36.20 NA 1.000
## 39 571269 39.02 NA 1.000
## 40 675748 36.04 NA 0.250
## 41 224021 35.98 NA 0.625
## 42 147362 32.02 NA 0.625
## 43 186460 34.93 NA 1.000
## 44 244225 35.91 NA 0.625
## 45 1498471 35.78 NA 0.625
## 46 13114 4.74 NA 0.625
## 47 8117 3.91 NA 0.625
## 48 58037 5.51 NA 0.625